{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regression\n",
    "## Create the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import torch\n",
    "from torch import nn, optim\n",
    "import math\n",
    "from IPython import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from res.plot_lib import plot_data, plot_model, set_default\n",
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_default()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = 1\n",
    "random.seed(seed)\n",
    "torch.manual_seed(seed)\n",
    "N = 1000  # num_samples_per_class\n",
    "D = 1  # dimensions\n",
    "C = 1  # num_classes\n",
    "H = 100  # num_hidden_units"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1).to(device)\n",
    "y = X.pow(3) + 0.3 * torch.rand(X.size()).to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Shapes:\")\n",
    "print(\"X:\", tuple(X.size()))\n",
    "print(\"y:\", tuple(y.size()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(X.cpu().numpy(), y.cpu().numpy())\n",
    "plt.axis('equal');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 1e-3\n",
    "lambda_l2 = 1e-5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# nn package to create our linear model\n",
    "# each Linear module has a weight and bias\n",
    "model = nn.Sequential(\n",
    "    nn.Linear(D, H),\n",
    "    nn.Linear(H, C)\n",
    ")\n",
    "model.to(device) # Convert to CUDA\n",
    "\n",
    "# nn package also has different loss functions.\n",
    "# we use MSE loss for our regression task\n",
    "criterion = torch.nn.MSELoss()\n",
    "\n",
    "# we use the optim package to apply\n",
    "# stochastic gradient descent for our parameter updates\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n",
    "\n",
    "# Training\n",
    "for t in range(1000):\n",
    "    \n",
    "    # Feed forward to get the logits\n",
    "    y_pred = model(X)\n",
    "    \n",
    "    # Compute the loss (MSE)\n",
    "    loss = criterion(y_pred, y)\n",
    "    print(\"[EPOCH]: %i, [LOSS or MSE]: %.6f\" % (t, loss.item()))\n",
    "    display.clear_output(wait=True)\n",
    "    \n",
    "    # zero the gradients before running\n",
    "    # the backward pass.\n",
    "    optimizer.zero_grad()\n",
    "    \n",
    "    # Backward pass to compute the gradient\n",
    "    # of loss w.r.t our learnable params. \n",
    "    loss.backward()\n",
    "    \n",
    "    # Update params\n",
    "    optimizer.step()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot trained model\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(X.data.cpu().numpy(), y.data.cpu().numpy())\n",
    "plt.plot(X.data.cpu().numpy(), y_pred.data.cpu().numpy(), 'r-', lw=5)\n",
    "plt.axis('equal');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Two-layered network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 1e-3\n",
    "lambda_l2 = 1e-5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Number of networks\n",
    "n_networks = 10\n",
    "models = list()\n",
    "y_pretrain = list()\n",
    "\n",
    "# nn package also has different loss functions.\n",
    "# we use MSE for a regression task\n",
    "criterion = torch.nn.MSELoss()\n",
    "\n",
    "for mod in range(n_networks):\n",
    "    # nn package to create our linear model\n",
    "    # each Linear module has a weight and bias\n",
    "    model = nn.Sequential(\n",
    "        nn.Linear(D, H),\n",
    "        nn.ReLU() if mod < n_networks // 2 else nn.Tanh(),\n",
    "        nn.Linear(H, C)\n",
    "    )\n",
    "    model.to(device)\n",
    "    \n",
    "    # Append models\n",
    "    models.append(model)\n",
    "\n",
    "    # we use the optim package to apply\n",
    "    # ADAM for our parameter updates\n",
    "    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n",
    "\n",
    "    # e = 1.  # plotting purpose\n",
    "\n",
    "    # Training\n",
    "    for t in range(1000):\n",
    "\n",
    "        # Feed forward to get the logits\n",
    "        y_pred = model(X)\n",
    "        \n",
    "        # Append pre-train output\n",
    "        if t == 0:\n",
    "            y_pretrain.append(y_pred.detach())\n",
    "\n",
    "        # Compute the loss and accuracy\n",
    "        loss = criterion(y_pred, y)\n",
    "        print(f\"[MODEL]: {mod + 1}, [EPOCH]: {t}, [LOSS]: {loss.item():.6f}\")\n",
    "        display.clear_output(wait=True)\n",
    "\n",
    "        # zero the gradients before running\n",
    "        # the backward pass.\n",
    "        optimizer.zero_grad()\n",
    "\n",
    "        # Backward pass to compute the gradient\n",
    "        # of loss w.r.t our learnable params. \n",
    "        loss.backward()\n",
    "\n",
    "        # Update params\n",
    "        optimizer.step()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(models[0], models[-1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Predictions: Before Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for y_pretrain_idx in y_pretrain:\n",
    "    # New X that ranges from -5 to 5 instead of -1 to 1\n",
    "    X_new = torch.unsqueeze(torch.linspace(-2, 2, 100), dim=1)\n",
    "        \n",
    "    plt.plot(X_new.numpy(), y_pretrain_idx.cpu().numpy(), 'r-', lw=1)\n",
    "\n",
    "plt.scatter(X.cpu().numpy(), y.cpu().numpy(), label='data')\n",
    "plt.axis('square')\n",
    "plt.axis((-1.1, 1.1, -1.1, 1.1));\n",
    "y_combo = torch.stack(y_pretrain)\n",
    "plt.plot(X_new.numpy(), y_combo.var(dim=0).cpu().numpy(), 'g', label='variance');\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Predictions: After Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = list()\n",
    "relu_models = models[:n_networks // 2]\n",
    "tanh_models = models[n_networks // 2:]\n",
    "plt.figure(figsize=(20, 10))\n",
    "\n",
    "def dense_prediction(models, non_linearity, zoom):\n",
    "    plt.subplot(1, 2, 1 if non_linearity == 'ReLU' else 2)\n",
    "    for model in models:\n",
    "        # New X that ranges from -5 to 5 instead of -1 to 1\n",
    "        X_new = torch.unsqueeze(torch.linspace(-4, 4, 1001), dim=1).to(device)\n",
    "\n",
    "        # Getting predictions from input\n",
    "        with torch.no_grad():\n",
    "            y_pred.append(model(X_new))\n",
    "\n",
    "        plt.plot(X_new.cpu().numpy(), y_pred[-1].cpu().numpy(), 'r-', lw=1)\n",
    "    plt.scatter(X.cpu().numpy(), y.cpu().numpy(), label='data')\n",
    "    plt.axis('square')\n",
    "    plt.axis(torch.tensor((-1.1, 1.1, -1.1, 1.1)) * zoom);\n",
    "    y_combo = torch.stack(y_pred)\n",
    "    plt.plot(X_new.cpu().numpy(), 10 * y_combo.var(dim=0).cpu().sqrt().numpy(), 'y', label='10 × std')\n",
    "    plt.plot(X_new.cpu().numpy(), 10 * y_combo.var(dim=0).cpu().numpy(), 'g', label='30 × variance')\n",
    "    plt.legend()\n",
    "    plt.title(non_linearity + ' models')\n",
    "\n",
    "z = 1  # try 1 or 4\n",
    "dense_prediction(relu_models, 'ReLU', zoom=z)\n",
    "dense_prediction(tanh_models, 'Tanh', zoom=z)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:dl-minicourse] *",
   "language": "python",
   "name": "conda-env-dl-minicourse-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
